library(ggpubr)
Registered S3 method overwritten by 'data.table':
method from
print.data.table
Registered S3 methods overwritten by 'car':
method from
influence.merMod lme4
cooks.distance.influence.merMod lme4
dfbeta.influence.merMod lme4
dfbetas.influence.merMod lme4
response <- try(system('~/google-cloud-sdk/bin/gcloud projects list --quiet', intern = T))
projectid <- strsplit(response[2], " ")[[1]][1]
options(na.action = "na.fail")
source("./dredge_functions.R")
dredge_and_subset <- function(data) {
model <- lm(
response ~ foraging_niche + trophic_niche + is_nocturnal + pc1 + pc2 + pc3 + pc4 + total_species,
data=data
)
dredge_result <- dredge(model)
summary(model.avg(dredge_result, subset = delta < 10))
}
load_niche_data <- function() {
filename <- 'species_analysis_cache.csv'
if (!file.exists(filename)) {
sql <- "
SELECT
niche_name,
foraging_niche,
trophic_niche,
is_noctural,
total_species,
merlin_10_perc_ratio,
merlin_20_perc_ratio,
merlin_ever_perc_ratio,
birdlife_10_perc_ratio,
birdlife_20_perc_ratio,
birdlife_ever_perc_ratio,
either_10_perc_ratio,
either_20_perc_ratio,
either_ever_perc_ratio,
both_10_perc_ratio,
both_20_perc_ratio,
both_ever_perc_ratio,
body_morphspace.pc1.mean AS pc1,
body_morphspace.pc2.mean AS pc2,
body_morphspace.pc3.mean AS pc3,
body_morphspace.pc4.mean AS pc4
FROM `endless-matter-297214.model2.niche_analysis_model` LIMIT 1000
"
tb <- bq_project_query(projectid, sql)
data <- bq_table_download(tb)
write_csv(data, filename)
}
data <- read_csv(filename)
data[is.na(data$foraging_niche),]$foraging_niche <- 'Omnivore'
data$foraging_niche = as.factor(data$foraging_niche)
data$trophic_niche = as.factor(data$trophic_niche)
data$is_nocturnal = as.factor(data$is_noctural)
data
}
data_for_response <- function(column_name_for_response) {
data <- load_niche_data()
names(data)[names(data) == column_name_for_response] <- "response"
data[,c("niche_name", "response", "foraging_niche", "trophic_niche", "is_nocturnal", "pc1", "pc2", "pc3", "pc4", "total_species")]
}
| 0.25 Percentile - 10% of species |
merlin_10_data <- data_for_response('merlin_10_perc_ratio')
── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
.default = col_double(),
niche_name = col_character(),
foraging_niche = col_character(),
trophic_niche = col_character(),
is_noctural = col_logical()
)
ℹ Use `spec()` for the full column specifications.
merlin_10_data
merlin_10_result <- dredge_and_subset(merlin_10_data)
Fixed term is "(Intercept)"
merlin_10_summ <- model_summary('merlin_10', 'species', merlin_10_result)
merlin_10_summ
birdlife_10_data <- data_for_response('birdlife_10_perc_ratio')
── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
.default = col_double(),
niche_name = col_character(),
foraging_niche = col_character(),
trophic_niche = col_character(),
is_noctural = col_logical()
)
ℹ Use `spec()` for the full column specifications.
birdlife_10_result <- dredge_and_subset(birdlife_10_data)
Fixed term is "(Intercept)"
birdlife_10_summ <- model_summary('birdlife_10', 'species', birdlife_10_result)
birdlife_10_summ
both_10_data <- data_for_response('both_10_perc_ratio')
── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
.default = col_double(),
niche_name = col_character(),
foraging_niche = col_character(),
trophic_niche = col_character(),
is_noctural = col_logical()
)
ℹ Use `spec()` for the full column specifications.
both_10_result <- dredge_and_subset(both_10_data)
Fixed term is "(Intercept)"
both_10_summ <- model_summary('both_10', 'species', both_10_result)
both_10_summ
either_10_data <- data_for_response('either_10_perc_ratio')
── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
.default = col_double(),
niche_name = col_character(),
foraging_niche = col_character(),
trophic_niche = col_character(),
is_noctural = col_logical()
)
ℹ Use `spec()` for the full column specifications.
either_10_result <- dredge_and_subset(either_10_data)
Fixed term is "(Intercept)"
either_10_summ <- model_summary('either_10', 'species', either_10_result)
either_10_summ
| Full result for 10% of species |
all_species_results <- full_join(full_join(merlin_10_summ, birdlife_10_summ), full_join(both_10_summ, either_10_summ))
Joining, by = c("explanatory", "model")
Joining, by = c("explanatory", "model")
Joining, by = c("explanatory", "model")
write_csv(all_species_results, "species_analysis_result_10_perc.csv")
| 0.75 Percentile - 20% of species |
merlin_20_data <- data_for_response('merlin_20_perc_ratio')
── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
.default = col_double(),
niche_name = col_character(),
foraging_niche = col_character(),
trophic_niche = col_character(),
is_noctural = col_logical()
)
ℹ Use `spec()` for the full column specifications.
merlin_20_result <- dredge_and_subset(merlin_20_data)
Fixed term is "(Intercept)"
merlin_20_summ <- model_summary('merlin_20', 'species', merlin_20_result)
merlin_20_summ
birdlife_20_data <- data_for_response('birdlife_20_perc_ratio')
── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
.default = col_double(),
niche_name = col_character(),
foraging_niche = col_character(),
trophic_niche = col_character(),
is_noctural = col_logical()
)
ℹ Use `spec()` for the full column specifications.
birdlife_20_result <- dredge_and_subset(birdlife_20_data)
Fixed term is "(Intercept)"
birdlife_20_summ <- model_summary('birdlife_20', 'species', birdlife_20_result)
birdlife_20_summ
both_20_data <- data_for_response('both_20_perc_ratio')
── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
.default = col_double(),
niche_name = col_character(),
foraging_niche = col_character(),
trophic_niche = col_character(),
is_noctural = col_logical()
)
ℹ Use `spec()` for the full column specifications.
both_20_result <- dredge_and_subset(both_20_data)
Fixed term is "(Intercept)"
both_20_summ <- model_summary('both_20', 'species', both_20_result)
both_20_summ
either_20_data <- data_for_response('either_20_perc_ratio')
── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
.default = col_double(),
niche_name = col_character(),
foraging_niche = col_character(),
trophic_niche = col_character(),
is_noctural = col_logical()
)
ℹ Use `spec()` for the full column specifications.
either_20_result <- dredge_and_subset(either_20_data)
Fixed term is "(Intercept)"
either_20_summ <- model_summary('either_20', 'species', either_20_result)
either_20_summ
| Full result for 10% of species |
all_species_results <- full_join(full_join(merlin_20_summ, birdlife_20_summ), full_join(both_20_summ, either_20_summ))
Joining, by = c("explanatory", "model")
Joining, by = c("explanatory", "model")
Joining, by = c("explanatory", "model")
write_csv(all_species_results, "species_analysis_result_20_perc.csv")
| Comparing the 2 percentiles |
ggplot(merlin_10_data, aes(x = response)) + geom_bar(width = 0.1)
Warning: position_stack requires non-overlapping x intervals

bind_sets <- function(first_percentile, second_percentile) {
first_percentile$percentile <- "Top 10%"
second_percentile$percentile <- "Top 20%"
rbind(first_percentile, second_percentile)
}
merlin_species_data <- bind_sets(merlin_10_data, merlin_20_data)
birdlife_species_data <- bind_sets(birdlife_10_data, birdlife_20_data)
either_species_data <- bind_sets(either_10_data, either_20_data)
both_species_data <- bind_sets(both_10_data, both_20_data)
diff_set <- function(first_percentile, second_percentile) {
second_percentile$response_20 <- second_percentile$response
result <- right_join(first_percentile, second_percentile[,c("niche_name", "response_20")], by = c("niche_name"))
result$diff <- result$response_20 - result$response
result$increase <- result$diff / result$response_20
result$increase[is.na(result$increase)] = 0
result$response_10 <- result$response
result[,c("response_10", "response_20", "diff", "increase", "foraging_niche", "trophic_niche", "is_nocturnal", "pc1", "pc2", "pc3", "pc4", "total_species")]
}
merlin_diff_data <- diff_set(merlin_10_data, merlin_20_data)
birdlife_diff_data <- diff_set(birdlife_10_data, birdlife_20_data)
either_diff_data <- diff_set(either_10_data, either_20_data)
both_diff_data <- diff_set(both_10_data, both_20_data)
merlin_diff_data
ggplot(merlin_species_data, aes(x = response, y = trophic_niche, color = percentile)) + geom_boxplot() + theme_bw()

merlin_trophic_niche_niche_anova <- aov(response~trophic_niche + percentile + Error(niche_name), data=merlin_species_data)
summary(merlin_trophic_niche_niche_anova)
Error: niche_name
Df Sum Sq Mean Sq F value Pr(>F)
trophic_niche 9 1.762 0.1958 1.679 0.0943 .
Residuals 258 30.084 0.1166
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Error: Within
Df Sum Sq Mean Sq F value Pr(>F)
percentile 1 1.508 1.5077 59.65 2.29e-13 ***
Residuals 267 6.749 0.0253
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
merlin_trophic_niche_niche_anova_i <- aov(response~trophic_niche * percentile + Error(niche_name), data=merlin_species_data)
summary(merlin_trophic_niche_niche_anova_i)
Error: niche_name
Df Sum Sq Mean Sq F value Pr(>F)
trophic_niche 9 1.762 0.1958 1.679 0.0943 .
Residuals 258 30.084 0.1166
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Error: Within
Df Sum Sq Mean Sq F value Pr(>F)
percentile 1 1.508 1.5077 69.821 4.05e-15 ***
trophic_niche:percentile 9 1.177 0.1308 6.057 1.03e-07 ***
Residuals 258 5.571 0.0216
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
pairwise.wilcox.test(merlin_diff_data$increase, merlin_diff_data$trophic_niche)
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
cannot compute exact p-value with ties
Warning in wilcox.test.default(xi, xj, paired = paired, ...) :
cannot compute exact p-value with ties
Pairwise comparisons using Wilcoxon rank sum test with continuity correction
data: merlin_diff_data$increase and merlin_diff_data$trophic_niche
Aquatic predator Frugivore Granivore Herbivore aquatic Herbivore terrestrial Invertivore Nectarivore Omnivore Scavenger
Frugivore 0.5331 - - - - - - - -
Granivore 1.0000 1.0000 - - - - - - -
Herbivore aquatic 1.0000 0.1167 1.0000 - - - - - -
Herbivore terrestrial 1.0000 1.0000 1.0000 1.0000 - - - - -
Invertivore 0.0076 1.0000 1.0000 0.0051 1.0000 - - - -
Nectarivore 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 - - -
Omnivore 0.2684 1.0000 1.0000 0.0679 1.0000 1.0000 1.0000 - -
Scavenger 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 -
Vertivore 1.0000 1.0000 1.0000 0.5929 1.0000 1.0000 1.0000 1.0000 1.0000
P value adjustment method: holm
library(multcomp)
merlin_increase_tropic_niche_anova <-aov(increase~trophic_niche, data=merlin_diff_data)
summary(merlin_increase_tropic_niche_anova)
Df Sum Sq Mean Sq F value Pr(>F)
trophic_niche 9 3.941 0.4379 3.689 0.000229 ***
Residuals 258 30.629 0.1187
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
merlin_increase_tropic_niche_tukey <- cld(glht(merlin_increase_tropic_niche_anova, linfct=mcp(trophic_niche="Tukey")))
Warning in RET$pfunction("adjusted", ...) :
Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
Completion with error > abseps
merlin_increase_tropic_niche_tukey
Aquatic predator Frugivore Granivore Herbivore aquatic Herbivore terrestrial Invertivore Nectarivore Omnivore
"bc" "ab" "ac" "c" "ac" "a" "ac" "ab"
Scavenger Vertivore
"ac" "ac"
plot(merlin_increase_tropic_niche_tukey)

with.tukey.label.as.group <- function(tukey, dataset, join_by) {
labels <- data.frame(tukey$mcletters$Letters)
labels$category <- rownames(labels)
colnames(labels) <- c("group", "category")
left_join(dataset, labels, by = join_by)
}
merlin_diff_data1 <- with.tukey.label.as.group(merlin_increase_tropic_niche_tukey, merlin_diff_data, c("trophic_niche" = "category"))
merlin_diff_data1
table(merlin_diff_data1$trophic_niche)
Aquatic predator Frugivore Granivore Herbivore aquatic Herbivore terrestrial Invertivore Nectarivore Omnivore
58 25 10 15 9 74 11 34
Scavenger Vertivore
3 29
ggplot(merlin_diff_data1, aes(x = increase, y = trophic_niche, color = group)) + geom_boxplot() + theme_bw()

ggplot(birdlife_species_data, aes(x = response, y = trophic_niche, color = percentile)) + geom_boxplot() + theme_bw()

birdlife_trophic_niche_niche_anova <- aov(response~trophic_niche + percentile + Error(niche_name), data=birdlife_species_data)
summary(birdlife_trophic_niche_niche_anova)
Error: niche_name
Df Sum Sq Mean Sq F value Pr(>F)
trophic_niche 9 1.696 0.1885 1.605 0.114
Residuals 258 30.292 0.1174
Error: Within
Df Sum Sq Mean Sq F value Pr(>F)
percentile 1 1.327 1.3274 60.18 1.84e-13 ***
Residuals 267 5.890 0.0221
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
birdlife_trophic_niche_niche_anova_i <- aov(response~trophic_niche * percentile + Error(niche_name), data=birdlife_species_data)
summary(birdlife_trophic_niche_niche_anova_i)
Error: niche_name
Df Sum Sq Mean Sq F value Pr(>F)
trophic_niche 9 1.696 0.1885 1.605 0.114
Residuals 258 30.292 0.1174
Error: Within
Df Sum Sq Mean Sq F value Pr(>F)
percentile 1 1.327 1.3274 64.018 4.19e-14 ***
trophic_niche:percentile 9 0.540 0.0600 2.893 0.00282 **
Residuals 258 5.350 0.0207
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
birdlife_increase_tropic_niche_anova <-aov(increase~trophic_niche, data=birdlife_diff_data)
summary(birdlife_increase_tropic_niche_anova)
Df Sum Sq Mean Sq F value Pr(>F)
trophic_niche 9 2.21 0.2453 1.791 0.0703 .
Residuals 258 35.35 0.1370
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
birdlife_increase_tropic_niche_tukey <- cld(glht(birdlife_increase_tropic_niche_anova, linfct=mcp(trophic_niche="Tukey")))
Warning in RET$pfunction("adjusted", ...) :
Completion with error > abseps
Warning in RET$pfunction("adjusted", ...) :
Completion with error > abseps
birdlife_increase_tropic_niche_tukey
Aquatic predator Frugivore Granivore Herbivore aquatic Herbivore terrestrial Invertivore Nectarivore Omnivore
"b" "ab" "ab" "ab" "ab" "a" "ab" "ab"
Scavenger Vertivore
"ab" "ab"
birdlife_diff_data1 <- with.tukey.label.as.group(birdlife_increase_tropic_niche_tukey, birdlife_diff_data, c("trophic_niche" = "category"))
ggplot(birdlife_diff_data1, aes(x = increase, y = trophic_niche, color = group)) + geom_boxplot() + theme_bw()

ggplot(either_species_data, aes(x = response, y = trophic_niche, color = percentile)) + geom_boxplot() + theme_bw()

ggplot(both_species_data, aes(x = response, y = trophic_niche, color = percentile)) + geom_boxplot() + theme_bw()

nrow(merlin_10_data[merlin_10_data$response > 0,])
[1] 96
nrow(merlin_20_data[merlin_20_data$response > 0,])
[1] 128
nrow(birdlife_10_data[birdlife_10_data$response > 0,])
[1] 87
nrow(birdlife_20_data[birdlife_20_data$response > 0,])
[1] 124
nrow(either_10_data[either_10_data$response > 0,])
[1] 94
nrow(either_20_data[either_20_data$response > 0,])
[1] 123
nrow(both_10_data[both_10_data$response > 0,])
[1] 91
nrow(both_20_data[both_20_data$response > 0,])
[1] 128
ggplot(merlin_species_data, aes(x = response, y = foraging_niche, color = percentile)) + geom_boxplot() + theme_bw()

interaction.plot(merlin_species_data$foraging_niche, merlin_species_data$percentile, merlin_species_data$response)

merlin_foraging_niche_anova <- aov(response~foraging_niche + percentile + Error(niche_name), data=merlin_species_data)
summary(merlin_foraging_niche_anova)
Error: niche_name
Df Sum Sq Mean Sq F value Pr(>F)
foraging_niche 32 4.894 0.1529 1.333 0.118
Residuals 235 26.952 0.1147
Error: Within
Df Sum Sq Mean Sq F value Pr(>F)
percentile 1 1.508 1.5077 59.65 2.29e-13 ***
Residuals 267 6.749 0.0253
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
merlin_foraging_niche_anova_i <- aov(response~foraging_niche * percentile + Error(niche_name), data=merlin_species_data)
summary(merlin_foraging_niche_anova_i)
Error: niche_name
Df Sum Sq Mean Sq F value Pr(>F)
foraging_niche 32 4.894 0.1529 1.333 0.118
Residuals 235 26.952 0.1147
Error: Within
Df Sum Sq Mean Sq F value Pr(>F)
percentile 1 1.508 1.5077 68.706 8.81e-15 ***
foraging_niche:percentile 32 1.591 0.0497 2.266 0.000275 ***
Residuals 235 5.157 0.0219
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
ggplot(birdlife_species_data, aes(x = response, y = foraging_niche, color = percentile)) + geom_boxplot() + theme_bw()

birdlife_foraging_niche_anova <- aov(response~foraging_niche + percentile + Error(niche_name), data=birdlife_species_data)
summary(birdlife_foraging_niche_anova)
Error: niche_name
Df Sum Sq Mean Sq F value Pr(>F)
foraging_niche 32 5.519 0.1725 1.531 0.0401 *
Residuals 235 26.470 0.1126
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Error: Within
Df Sum Sq Mean Sq F value Pr(>F)
percentile 1 1.327 1.3274 60.18 1.84e-13 ***
Residuals 267 5.890 0.0221
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
birdlife_foraging_niche_anova_i <- aov(response~foraging_niche * percentile + Error(niche_name), data=birdlife_species_data)
summary(birdlife_foraging_niche_anova_i)
Error: niche_name
Df Sum Sq Mean Sq F value Pr(>F)
foraging_niche 32 5.519 0.1725 1.531 0.0401 *
Residuals 235 26.470 0.1126
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Error: Within
Df Sum Sq Mean Sq F value Pr(>F)
percentile 1 1.327 1.3274 67.870 1.22e-14 ***
foraging_niche:percentile 32 1.293 0.0404 2.066 0.00118 **
Residuals 235 4.596 0.0196
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
pc_axis_figure <- function(dataset) {
annotate_figure(
ggarrange(
ggplot(dataset, aes(x = pc1, y = log(response), color = percentile)) + geom_point(alpha = 0.5) + geom_smooth(method = "lm", se = FALSE) + theme_bw() + rremove("ylab"),
ggplot(dataset, aes(x = pc2, y = log(response), color = percentile)) + geom_point(alpha = 0.5) + geom_smooth(method = "lm", se = FALSE) + theme_bw() + rremove("ylab"),
ggplot(dataset, aes(x = pc3, y = log(response), color = percentile)) + geom_point(alpha = 0.5) + geom_smooth(method = "lm", se = FALSE) + theme_bw() + rremove("ylab"),
ggplot(dataset, aes(x = pc4, y = log(response), color = percentile)) + geom_point(alpha = 0.5) + geom_smooth(method = "lm", se = FALSE) + theme_bw() + rremove("ylab"),
nrow = 2, ncol = 2, common.legend = T, label.x = 0),
left = text_grob("log(response)", rot = 90))
}
pc_axis_figure(merlin_species_data)
`geom_smooth()` using formula 'y ~ x'
Warning: Removed 312 rows containing non-finite values (stat_smooth).
`geom_smooth()` using formula 'y ~ x'
Warning: Removed 312 rows containing non-finite values (stat_smooth).
`geom_smooth()` using formula 'y ~ x'
Warning: Removed 312 rows containing non-finite values (stat_smooth).
`geom_smooth()` using formula 'y ~ x'
Warning: Removed 312 rows containing non-finite values (stat_smooth).
`geom_smooth()` using formula 'y ~ x'
Warning: Removed 312 rows containing non-finite values (stat_smooth).


library(lme4)
merlin_pc1_model = lmer(response ~ pc1 * percentile + (1|niche_name), data=merlin_species_data)
anova(merlin_pc1_model)
Analysis of Variance Table
npar Sum Sq Mean Sq F value
pc1 1 0.00042 0.00042 0.0169
percentile 1 1.50774 1.50774 59.9817
pc1:percentile 1 0.06215 0.06215 2.4724
summary(merlin_pc1_model)
Linear mixed model fit by REML ['lmerMod']
Formula: response ~ pc1 * percentile + (1 | niche_name)
Data: merlin_species_data
REML criterion at convergence: -8.6
Scaled residuals:
Min 1Q Median 3Q Max
-2.4461 -0.5197 0.0155 0.1840 3.3433
Random effects:
Groups Name Variance Std.Dev.
niche_name (Intercept) 0.04729 0.2175
Residual 0.02514 0.1585
Number of obs: 536, groups: niche_name, 268
Fixed effects:
Estimate Std. Error t value
(Intercept) 0.117739 0.022322 5.275
pc1 -0.005011 0.006481 -0.773
percentileTop 20% 0.086293 0.018597 4.640
pc1:percentileTop 20% 0.008490 0.005399 1.572
Correlation of Fixed Effects:
(Intr) pc1 prT20%
pc1 -0.676
prcntlTp20% -0.417 0.282
pc1:prcT20% 0.282 -0.417 -0.676
merlin_pc2_model = lmer(response ~ pc2 * percentile + (1|niche_name), data=merlin_species_data)
anova(merlin_pc2_model)
Analysis of Variance Table
npar Sum Sq Mean Sq F value
pc2 1 0.01486 0.01486 0.5864
percentile 1 1.50774 1.50774 59.5009
pc2:percentile 1 0.00812 0.00812 0.3203
summary(merlin_pc2_model)
Linear mixed model fit by REML ['lmerMod']
Formula: response ~ pc2 * percentile + (1 | niche_name)
Data: merlin_species_data
REML criterion at convergence: -11.2
Scaled residuals:
Min 1Q Median 3Q Max
-2.3691 -0.5150 0.0879 0.1354 3.3292
Random effects:
Groups Name Variance Std.Dev.
niche_name (Intercept) 0.04706 0.2169
Residual 0.02534 0.1592
Number of obs: 536, groups: niche_name, 268
Fixed effects:
Estimate Std. Error t value
(Intercept) 0.104389 0.016836 6.200
pc2 -0.008532 0.018597 -0.459
percentileTop 20% 0.104347 0.014086 7.408
pc2:percentileTop 20% -0.008805 0.015559 -0.566
Correlation of Fixed Effects:
(Intr) pc2 prT20%
pc2 0.217
prcntlTp20% -0.418 -0.091
pc2:prcT20% -0.091 -0.418 0.217
merlin_pc3_model = lmer(response ~ pc3 * percentile + (1|niche_name), data=merlin_species_data)
anova(merlin_pc3_model)
Analysis of Variance Table
npar Sum Sq Mean Sq F value
pc3 1 0.00044 0.00044 0.0187
percentile 1 1.50774 1.50774 63.7963
pc3:percentile 1 0.46195 0.46195 19.5464
summary(merlin_pc3_model)
Linear mixed model fit by REML ['lmerMod']
Formula: response ~ pc3 * percentile + (1 | niche_name)
Data: merlin_species_data
REML criterion at convergence: -30.5
Scaled residuals:
Min 1Q Median 3Q Max
-2.8609 -0.4057 -0.0338 0.2719 3.7473
Random effects:
Groups Name Variance Std.Dev.
niche_name (Intercept) 0.04804 0.2192
Residual 0.02363 0.1537
Number of obs: 536, groups: niche_name, 268
Fixed effects:
Estimate Std. Error t value
(Intercept) 0.10758 0.01638 6.568
pc3 -0.04333 0.02594 -1.670
percentileTop 20% 0.10282 0.01330 7.730
pc3:percentileTop 20% 0.09314 0.02107 4.421
Correlation of Fixed Effects:
(Intr) pc3 prT20%
pc3 -0.055
prcntlTp20% -0.406 0.022
pc3:prcT20% 0.022 -0.406 -0.055
merlin_pc4_model = lmer(response ~ pc4 * percentile + (1|niche_name), data=merlin_species_data)
anova(merlin_pc4_model)
Analysis of Variance Table
npar Sum Sq Mean Sq F value
pc4 1 0.06576 0.06576 2.6061
percentile 1 1.50774 1.50774 59.7558
pc4:percentile 1 0.03687 0.03687 1.4612
summary(merlin_pc4_model)
Linear mixed model fit by REML ['lmerMod']
Formula: response ~ pc4 * percentile + (1 | niche_name)
Data: merlin_species_data
REML criterion at convergence: -17
Scaled residuals:
Min 1Q Median 3Q Max
-2.4101 -0.5034 0.0731 0.1400 3.4938
Random effects:
Groups Name Variance Std.Dev.
niche_name (Intercept) 0.04666 0.2160
Residual 0.02523 0.1588
Number of obs: 536, groups: niche_name, 268
Fixed effects:
Estimate Std. Error t value
(Intercept) 0.10678 0.01640 6.513
pc4 0.03400 0.03543 0.960
percentileTop 20% 0.10683 0.01374 7.777
pc4:percentileTop 20% 0.03588 0.02969 1.209
Correlation of Fixed Effects:
(Intr) pc4 prT20%
pc4 0.046
prcntlTp20% -0.419 -0.019
pc4:prcT20% -0.019 -0.419 0.046
pc_axis_figure(birdlife_species_data)
`geom_smooth()` using formula 'y ~ x'
Warning: Removed 325 rows containing non-finite values (stat_smooth).
`geom_smooth()` using formula 'y ~ x'
Warning: Removed 325 rows containing non-finite values (stat_smooth).
`geom_smooth()` using formula 'y ~ x'
Warning: Removed 325 rows containing non-finite values (stat_smooth).
`geom_smooth()` using formula 'y ~ x'
Warning: Removed 325 rows containing non-finite values (stat_smooth).
`geom_smooth()` using formula 'y ~ x'
Warning: Removed 325 rows containing non-finite values (stat_smooth).


ggplot(merlin_species_data, aes(x = response, y = is_nocturnal, color = percentile)) + geom_boxplot() + theme_bw()

merlin_nocturnal_anova <- aov(response~is_nocturnal + percentile + Error(niche_name), data=merlin_species_data)
summary(merlin_nocturnal_anova)
Error: niche_name
Df Sum Sq Mean Sq F value Pr(>F)
is_nocturnal 1 0.462 0.4615 3.912 0.049 *
Residuals 266 31.384 0.1180
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Error: Within
Df Sum Sq Mean Sq F value Pr(>F)
percentile 1 1.508 1.5077 59.65 2.29e-13 ***
Residuals 267 6.749 0.0253
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
merlin_nocturnal_anova_i <- aov(response~is_nocturnal * percentile + Error(niche_name), data=merlin_species_data)
summary(merlin_nocturnal_anova_i)
Error: niche_name
Df Sum Sq Mean Sq F value Pr(>F)
is_nocturnal 1 0.462 0.4615 3.912 0.049 *
Residuals 266 31.384 0.1180
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Error: Within
Df Sum Sq Mean Sq F value Pr(>F)
percentile 1 1.508 1.5077 60.685 1.51e-13 ***
is_nocturnal:percentile 1 0.140 0.1396 5.618 0.0185 *
Residuals 266 6.609 0.0248
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
ggplot(birdlife_species_data, aes(x = response, y = is_nocturnal, color = percentile)) + geom_boxplot() + theme_bw()

birdlife_nocturnal_anova <- aov(response~is_nocturnal + percentile + Error(niche_name), data=birdlife_species_data)
summary(birdlife_nocturnal_anova)
Error: niche_name
Df Sum Sq Mean Sq F value Pr(>F)
is_nocturnal 1 0.20 0.1982 1.658 0.199
Residuals 266 31.79 0.1195
Error: Within
Df Sum Sq Mean Sq F value Pr(>F)
percentile 1 1.327 1.3274 60.18 1.84e-13 ***
Residuals 267 5.890 0.0221
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
birdlife_nocturnal_anova_i <- aov(response~is_nocturnal * percentile + Error(niche_name), data=birdlife_species_data)
summary(birdlife_nocturnal_anova_i)
Error: niche_name
Df Sum Sq Mean Sq F value Pr(>F)
is_nocturnal 1 0.20 0.1982 1.658 0.199
Residuals 266 31.79 0.1195
Error: Within
Df Sum Sq Mean Sq F value Pr(>F)
percentile 1 1.327 1.3274 60.370 1.72e-13 ***
is_nocturnal:percentile 1 0.041 0.0407 1.852 0.175
Residuals 266 5.849 0.0220
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
| Investigation of percentiles and presence |
ggplot(merlin_species_data, aes(x = total_species, y = log(response), color = percentile)) + geom_point(alpha = 0.5) + geom_smooth(method = "lm", se = FALSE) + theme_bw()
`geom_smooth()` using formula 'y ~ x'
Warning: Removed 312 rows containing non-finite values (stat_smooth).

merlin_species_count_model = lmer(response ~ total_species * percentile + (1|niche_name), data=merlin_species_data)
anova(merlin_species_count_model)
Analysis of Variance Table
npar Sum Sq Mean Sq F value
total_species 1 0.00052 0.00052 0.0206
percentile 1 1.50774 1.50774 59.4316
total_species:percentile 1 0.00026 0.00026 0.0103
summary(merlin_species_count_model)
Linear mixed model fit by REML ['lmerMod']
Formula: response ~ total_species * percentile + (1 | niche_name)
Data: merlin_species_data
REML criterion at convergence: 8.6
Scaled residuals:
Min 1Q Median 3Q Max
-2.3523 -0.5462 0.1109 0.1216 3.2582
Random effects:
Groups Name Variance Std.Dev.
niche_name (Intercept) 0.04717 0.2172
Residual 0.02537 0.1593
Number of obs: 536, groups: niche_name, 268
Fixed effects:
Estimate Std. Error t value
(Intercept) 1.064e-01 1.690e-02 6.295
total_species -1.420e-05 1.612e-04 -0.088
percentileTop 20% 1.064e-01 1.414e-02 7.527
total_species:percentileTop 20% -1.367e-05 1.348e-04 -0.101
Correlation of Fixed Effects:
(Intr) ttl_sp prT20%
total_specs -0.229
prcntlTp20% -0.418 0.096
ttl_sp:T20% 0.096 -0.418 -0.229
birdlife_species_count_model = lmer(response ~ total_species * percentile + (1|niche_name), data=birdlife_species_data)
anova(birdlife_species_count_model)
Analysis of Variance Table
npar Sum Sq Mean Sq F value
total_species 1 0.0000 0.0000 0.0001
percentile 1 1.3274 1.3274 59.9531
total_species:percentile 1 0.0000 0.0000 0.0001
summary(birdlife_species_count_model)
Linear mixed model fit by REML ['lmerMod']
Formula: response ~ total_species * percentile + (1 | niche_name)
Data: birdlife_species_data
REML criterion at convergence: -26.3
Scaled residuals:
Min 1Q Median 3Q Max
-2.5919 -0.5189 0.0633 0.1497 3.4600
Random effects:
Groups Name Variance Std.Dev.
niche_name (Intercept) 0.04906 0.2215
Residual 0.02214 0.1488
Number of obs: 536, groups: niche_name, 268
Fixed effects:
Estimate Std. Error t value
(Intercept) 9.946e-02 1.675e-02 5.939
total_species 9.392e-07 1.597e-04 0.006
percentileTop 20% 9.949e-02 1.321e-02 7.534
total_species:percentileTop 20% 1.518e-06 1.260e-04 0.012
Correlation of Fixed Effects:
(Intr) ttl_sp prT20%
total_specs -0.229
prcntlTp20% -0.394 0.090
ttl_sp:T20% 0.090 -0.394 -0.229
merlin_species_data$present <- merlin_species_data$response > 0

merlin_present_model = lmer(present ~ log(total_species) * percentile + (1|niche_name), data=merlin_species_data)
anova(merlin_present_model)
Analysis of Variance Table
npar Sum Sq Mean Sq F value
log(total_species) 1 7.6804 7.6804 145.5124
percentile 1 1.9104 1.9104 36.1952
log(total_species):percentile 1 0.0496 0.0496 0.9393
summary(merlin_present_model)
Linear mixed model fit by REML ['lmerMod']
Formula: present ~ log(total_species) * percentile + (1 | niche_name)
Data: merlin_species_data
REML criterion at convergence: 410.7
Scaled residuals:
Min 1Q Median 3Q Max
-2.25242 -0.44048 0.02734 0.24296 2.14911
Random effects:
Groups Name Variance Std.Dev.
niche_name (Intercept) 0.11259 0.3356
Residual 0.05278 0.2297
Number of obs: 536, groups: niche_name, 268
Fixed effects:
Estimate Std. Error t value
(Intercept) 0.10521 0.03325 3.164
log(total_species) 0.19281 0.01685 11.446
percentileTop 20% 0.13652 0.02657 5.139
log(total_species):percentileTop 20% -0.01304 0.01346 -0.969
Correlation of Fixed Effects:
(Intr) lg(t_) prT20%
lg(ttl_spc) -0.665
prcntlTp20% -0.399 0.266
lg(t_):T20% 0.266 -0.399 -0.665
assign_niche_appearance_factor <- function(dataset) {
dataset$niche_appearance = '> 20%'
dataset$niche_appearance[dataset$response_20 > 0] = '10% - 20%'
dataset$niche_appearance[dataset$response_10 > 0] = '< 10%'
dataset$niche_appearance <- factor(dataset$niche_appearance, levels = c("< 10%", "10% - 20%", "> 20%"))
dataset
}
merlin_diff_data <- assign_niche_appearance_factor(merlin_diff_data)
ggplot(merlin_diff_data, aes(fill=niche_appearance, y=trophic_niche)) +
geom_bar()

load_ever_data <- function(pool) {
response_name <- paste(pool, 'ever', 'perc', 'ratio', sep = '_')
dataset <- data_for_response(response_name)
dataset$present <- 'Yes'
dataset$present[dataset$response == 0] <- 'No'
dataset$present <- factor(dataset$present, levels = c("Yes", "No"))
dataset
}
merlin_ever_data <- load_ever_data('merlin')
── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
.default = col_double(),
niche_name = col_character(),
foraging_niche = col_character(),
trophic_niche = col_character(),
is_noctural = col_logical()
)
ℹ Use `spec()` for the full column specifications.
merlin_ever_data
ggplot(merlin_ever_data, aes(fill=present, y=trophic_niche)) +
geom_bar()

birdlife_diff_data <- assign_niche_appearance_factor(birdlife_diff_data)
ggplot(birdlife_diff_data, aes(fill=niche_appearance, y=trophic_niche)) +
geom_bar()

birdlife_ever_data <- load_ever_data('birdlife')
── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
.default = col_double(),
niche_name = col_character(),
foraging_niche = col_character(),
trophic_niche = col_character(),
is_noctural = col_logical()
)
ℹ Use `spec()` for the full column specifications.
ggplot(merlin_ever_data, aes(fill=present, y=trophic_niche)) +
geom_bar()

| PCA Variation with presence |
ggplot(merlin_ever_data, aes(color = present, x = pc1, y = trophic_niche)) +
geom_violin() + geom_jitter(aes(group=present, size = total_species), position=position_jitterdodge()) + theme_bw() +
theme(legend.position="bottom", legend.title=element_text(size=9), legend.text=element_text(size=8), legend.key.size = unit(1,"line")) +
guides(color=guide_legend(title="Present in any city?"), size=guide_legend(title="Niche size")) +
ylab("Trophic Niche") + xlab("PC1: Body size") + labs(title = "Merlin Regional Pool")
ggsave("species_analysis_pc1_merlin.jpg")
Saving 7.29 x 4.51 in image

ggplot(merlin_diff_data, aes(color = niche_appearance, x = pc1, y = trophic_niche)) +
geom_violin()
Warning: Groups with fewer than two data points have been dropped.
Warning: Groups with fewer than two data points have been dropped.
Warning: Groups with fewer than two data points have been dropped.
Warning: Groups with fewer than two data points have been dropped.
Warning: Groups with fewer than two data points have been dropped.




ggplot(birdlife_ever_data, aes(color = present, x = pc1, y = trophic_niche)) +
geom_violin() + geom_jitter(aes(group=present, size = total_species), position=position_jitterdodge()) + theme_bw() +
theme(legend.position="bottom", legend.title=element_text(size=9), legend.text=element_text(size=8), legend.key.size = unit(1,"line")) +
guides(color=guide_legend(title="Present in any city?"), size=guide_legend(title="Niche size")) +
ylab("Trophic Niche") + xlab("PC1: Body size") + labs(title = "Birdlife Regional Pool")
ggsave("species_analysis_pc1_birdlife.jpg")

- Niches recorded present in at least one city.
- Given species ranked by percentage of cities populated given they are present in regional pool; This shows niches present with the top 10% of the most urbanised species (red), the top 20% of the most urbanised species (red + green), and then the remaining niches (blue). This approximates to the niches that would be present in the bottom 25% performing hotspots (red), the bottom 75% performing hotspots (red + green), and then the niches at the highest performing hotspots or niches that are never present (blue).

- The redundancy/fullness of the niches that are present in cities with 10% of the most urbanised species, and the redundancy/fullness of the niches that are present in cities with 20% of the most urbanised species.
- The percentage increase in redundancy/fullness of niches between the top 10% of most urbanised species and the top 20% of most urbanised species, grouped using Tukey based on an anova of the percentage increase given the tropic niche. This shows the amount of redundancy that is increased from the bottom 25% performing hotspots (hotspots with 10% of regional pool) and the bottom 75% of performing hotspots (hotspots with 20% of regional pool).
jpeg("species_plot1.jpg", width = 800, height = 600)
species_plot1
dev.off()
jpeg("species_plot2.jpg", width = 800, height = 600)
species_plot2
dev.off()
| Trophic Niche Accumulation |
merlin_accumulation = ggplot(accumulation, aes(x = percent, y = merlin_niche_count)) +
geom_line(linetype = "dotted") +
geom_line(aes(linetype = merlin_remaining_species), na.value = "dash") +
scale_linetype_manual(values=c("blank", "solid"), guide = "none") +
geom_hline(data = . %>% group_by(trophic_niche) %>% filter(merlin_niche_count == max(merlin_niche_count)), aes(yintercept = merlin_niche_count), color = "blue", size = 0.25, linetype = "dashed") +
theme_bw() +
theme(legend.position = "bottom", strip.text = element_text(size = 6), axis.text.x = element_text(angle = 90, size = 6), axis.text.y = element_text(size = 6)) +
facet_wrap (~ trophic_niche) +
xlab("Percentage of all Species") +
ylab("Number of Niches") +
labs(title = "Merlin Niche Accumulation")
Warning: Ignoring unknown parameters: na.value
merlin_accumulation

birdlife_accumulation = ggplot(accumulation, aes(x = percent, y = birdlife_niche_count)) +
geom_line(linetype = "dotted") +
geom_line(aes(linetype = birdlife_remaining_species), na.value = "dash") +
scale_linetype_manual(values=c("blank", "solid"), guide = "none") +
geom_hline(data = . %>% group_by(trophic_niche) %>% filter(birdlife_niche_count == max(birdlife_niche_count)), aes(yintercept = birdlife_niche_count), color = "blue", size = 0.25, linetype = "dashed") +
theme_bw() +
theme(legend.position = "bottom", strip.text = element_text(size = 6), axis.text.x = element_text(angle = 90, size = 6), axis.text.y = element_text(size = 6)) +
facet_wrap (~ trophic_niche) +
xlab("Percentage of all Species") +
ylab("Number of Niches") +
labs(title = "Birdlife Niche Accumulation")
Warning: Ignoring unknown parameters: na.value
birdlife_accumulation

annotate_figure(
ggarrange(merlin_accumulation + rremove("xlab") + rremove("ylab"),
birdlife_accumulation + rremove("xlab") + rremove("ylab"),
ncol = 2, label.x = 0),
left = text_grob("Number of Niches Present", rot = 90),
bottom = text_grob("Percentage of all Species - ranked by urbanisation"))
ggsave("trophic_niche_accumlation.jpg")
Saving 7.29 x 4.51 in image

ggplot(accumulation, aes(x = percent, y = birdlife_niche_count)) +
geom_line(linetype = "dotted") +
geom_line(aes(linetype = birdlife_remaining_species), na.value = "dash") +
scale_linetype_manual(values=c("blank", "solid"), guide = "none") +
geom_hline(data = . %>% group_by(trophic_niche) %>% filter(birdlife_niche_count == max(birdlife_niche_count)), aes(yintercept = birdlife_niche_count), color = "blue", size = 0.25, linetype = "dashed") +
theme_bw() +
theme(legend.position = "bottom", strip.text = element_text(size = 6), axis.text.x = element_text(angle = 90, size = 6), axis.text.y = element_text(size = 6)) +
facet_wrap (~ factor(trophic_niche, levels = c("Invertivore", "Aquatic predator", "Omnivore", "Vertivore", "Frugivore", "Herbivore aquatic", "Granivore", "Herbivore terrestrial", "Nectarivore", "Scavenger")), scales = "free") +
xlab("Percentage of all Species") +
ylab("Number of Niches") +
labs(title = "Birdlife Niche Accumulation") +
xlim(0, 50)
Warning: Ignoring unknown parameters: na.value
Warning: Removed 10 row(s) containing missing values (geom_path).
Warning: Removed 10 row(s) containing missing values (geom_path).

---
title: "R Notebook"
output: html_notebook
---
```{r}
library(stringr)
library(tidyverse)
library(bigrquery)
library(MuMIn)
library(ggplot2)

library(ggpubr)
```

```{r}
response <- try(system('~/google-cloud-sdk/bin/gcloud projects list --quiet', intern = T))
projectid <- strsplit(response[2], " ")[[1]][1]
```

```{r}
options(na.action = "na.fail") 
```

```{r}
source("./dredge_functions.R")
```

```{r}
dredge_and_subset <- function(data) {
  model <- lm(
    response ~ foraging_niche + trophic_niche + is_nocturnal + pc1 + pc2 + pc3 + pc4 + total_species,
    data=data
  )
  dredge_result <- dredge(model)
  summary(model.avg(dredge_result, subset = delta < 10))
}
```

```{r}
load_niche_data <- function() {
  filename <- 'species_analysis_cache.csv'
  
  if (!file.exists(filename)) {
    
    sql <- "
        SELECT 
          niche_name,
          foraging_niche, 
          trophic_niche, 
          is_noctural, 
          total_species,
          merlin_10_perc_ratio, 
          merlin_20_perc_ratio,
          merlin_ever_perc_ratio,
          birdlife_10_perc_ratio, 
          birdlife_20_perc_ratio,
          birdlife_ever_perc_ratio,
          either_10_perc_ratio, 
          either_20_perc_ratio, 
          either_ever_perc_ratio,
          both_10_perc_ratio, 
          both_20_perc_ratio,
          both_ever_perc_ratio,
          body_morphspace.pc1.mean AS pc1, 
          body_morphspace.pc2.mean AS pc2, 
          body_morphspace.pc3.mean AS pc3, 
          body_morphspace.pc4.mean AS pc4
        FROM `endless-matter-297214.model2.niche_analysis_model` LIMIT 1000 
    "
  
    tb <- bq_project_query(projectid, sql)

    data <- bq_table_download(tb)
    write_csv(data, filename)
  }
  
  data <- read_csv(filename)
  
  data[is.na(data$foraging_niche),]$foraging_niche <- 'Omnivore'
  
  data$foraging_niche = as.factor(data$foraging_niche)
  data$trophic_niche = as.factor(data$trophic_niche)
  data$is_nocturnal = as.factor(data$is_noctural)
  
  data
}

data_for_response <- function(column_name_for_response) {
  data <- load_niche_data()
  names(data)[names(data) == column_name_for_response] <- "response"
  
  data[,c("niche_name", "response", "foraging_niche", "trophic_niche", "is_nocturnal", "pc1", "pc2", "pc3", "pc4", "total_species")]
}
```

---------------------------------
0.25 Percentile - 10% of species
---------------------------------

```{r}
merlin_10_data <- data_for_response('merlin_10_perc_ratio')
merlin_10_data
```


```{r}
merlin_10_result <- dredge_and_subset(merlin_10_data)
merlin_10_summ <- model_summary('merlin_10', 'species', merlin_10_result)
merlin_10_summ
```

```{r}
birdlife_10_data <- data_for_response('birdlife_10_perc_ratio')
birdlife_10_result <- dredge_and_subset(birdlife_10_data)
birdlife_10_summ <- model_summary('birdlife_10', 'species', birdlife_10_result)
birdlife_10_summ
```


```{r}
both_10_data <- data_for_response('both_10_perc_ratio')
both_10_result <- dredge_and_subset(both_10_data)
both_10_summ <- model_summary('both_10', 'species', both_10_result)
both_10_summ
```


```{r}
either_10_data <- data_for_response('either_10_perc_ratio')
either_10_result <- dredge_and_subset(either_10_data)
either_10_summ <- model_summary('either_10', 'species', either_10_result)
either_10_summ
```

------------------------------
Full result for 10% of species
------------------------------
```{r}
all_species_results <- full_join(full_join(merlin_10_summ, birdlife_10_summ), full_join(both_10_summ, either_10_summ))
write_csv(all_species_results, "species_analysis_result_10_perc.csv")
```

---------------------------------
0.75 Percentile - 20% of species
---------------------------------

```{r}
merlin_20_data <- data_for_response('merlin_20_perc_ratio')
merlin_20_result <- dredge_and_subset(merlin_20_data)
merlin_20_summ <- model_summary('merlin_20', 'species', merlin_20_result)
merlin_20_summ
```

```{r}
birdlife_20_data <- data_for_response('birdlife_20_perc_ratio')
birdlife_20_result <- dredge_and_subset(birdlife_20_data)
birdlife_20_summ <- model_summary('birdlife_20', 'species', birdlife_20_result)
birdlife_20_summ
```

```{r}
both_20_data <- data_for_response('both_20_perc_ratio')
both_20_result <- dredge_and_subset(both_20_data)
both_20_summ <- model_summary('both_20', 'species', both_20_result)
both_20_summ
```

```{r}
either_20_data <- data_for_response('either_20_perc_ratio')
either_20_result <- dredge_and_subset(either_20_data)
either_20_summ <- model_summary('either_20', 'species', either_20_result)
either_20_summ
```

------------------------------
Full result for 10% of species
------------------------------
```{r}
all_species_results <- full_join(full_join(merlin_20_summ, birdlife_20_summ), full_join(both_20_summ, either_20_summ))
write_csv(all_species_results, "species_analysis_result_20_perc.csv")
```

------------------------------
Comparing the 2 percentiles
------------------------------

```{r}
ggplot(merlin_10_data, aes(x = response)) + geom_bar(width = 0.1)
```

```{r}
bind_sets <- function(first_percentile, second_percentile) {
  first_percentile$percentile <- "Top 10%"
  second_percentile$percentile <- "Top 20%"
  rbind(first_percentile, second_percentile)
}

merlin_species_data <- bind_sets(merlin_10_data,  merlin_20_data)
birdlife_species_data <- bind_sets(birdlife_10_data,  birdlife_20_data)
either_species_data <- bind_sets(either_10_data,  either_20_data)
both_species_data <- bind_sets(both_10_data,  both_20_data)
```

```{r}
diff_set <- function(first_percentile, second_percentile) {
  second_percentile$response_20 <- second_percentile$response
  
  result <- right_join(first_percentile, second_percentile[,c("niche_name", "response_20")], by = c("niche_name"))
  result$diff <- result$response_20 - result$response
  result$increase <- result$diff / result$response_20
  result$increase[is.na(result$increase)] = 0
  result$response_10 <- result$response
  result[,c("response_10", "response_20", "diff", "increase", "foraging_niche", "trophic_niche", "is_nocturnal", "pc1", "pc2", "pc3", "pc4", "total_species")]
}

merlin_diff_data <- diff_set(merlin_10_data,  merlin_20_data)
birdlife_diff_data <- diff_set(birdlife_10_data,  birdlife_20_data)
either_diff_data <- diff_set(either_10_data,  either_20_data)
both_diff_data <- diff_set(both_10_data,  both_20_data)

merlin_diff_data
```

```{r}
ggplot(merlin_species_data, aes(x = response, y = trophic_niche, color = percentile)) + geom_boxplot() + theme_bw()
```

```{r}
merlin_trophic_niche_niche_anova <- aov(response~trophic_niche + percentile + Error(niche_name), data=merlin_species_data)
summary(merlin_trophic_niche_niche_anova)

merlin_trophic_niche_niche_anova_i <- aov(response~trophic_niche * percentile + Error(niche_name), data=merlin_species_data)
summary(merlin_trophic_niche_niche_anova_i)
```

```{r}
pairwise.wilcox.test(merlin_diff_data$increase, merlin_diff_data$trophic_niche)
```
```{r}
library(multcomp)
merlin_increase_tropic_niche_anova <-aov(increase~trophic_niche, data=merlin_diff_data)
summary(merlin_increase_tropic_niche_anova)

merlin_increase_tropic_niche_tukey <- cld(glht(merlin_increase_tropic_niche_anova, linfct=mcp(trophic_niche="Tukey")))

merlin_increase_tropic_niche_tukey
```

```{r}
plot(merlin_increase_tropic_niche_tukey)
```
```{r}
with.tukey.label.as.group <- function(tukey, dataset, join_by) {
  labels <- data.frame(tukey$mcletters$Letters)
  labels$category <- rownames(labels)
  colnames(labels) <- c("group", "category")
  
  
  left_join(dataset, labels, by = join_by)
}
```

```{r}
merlin_diff_data1 <- with.tukey.label.as.group(merlin_increase_tropic_niche_tukey, merlin_diff_data, c("trophic_niche" = "category"))
merlin_diff_data1
```
```{r}
table(merlin_diff_data1$trophic_niche)
```

```{r}
ggplot(merlin_diff_data1, aes(x = increase, y = trophic_niche, color = group)) + geom_boxplot() + theme_bw()
```

```{r}
ggplot(birdlife_species_data, aes(x = response, y = trophic_niche, color = percentile)) + geom_boxplot() + theme_bw()
```

```{r}
birdlife_trophic_niche_niche_anova <- aov(response~trophic_niche + percentile + Error(niche_name), data=birdlife_species_data)
summary(birdlife_trophic_niche_niche_anova)

birdlife_trophic_niche_niche_anova_i <- aov(response~trophic_niche * percentile + Error(niche_name), data=birdlife_species_data)
summary(birdlife_trophic_niche_niche_anova_i)
```

```{r}
birdlife_increase_tropic_niche_anova <-aov(increase~trophic_niche, data=birdlife_diff_data)
summary(birdlife_increase_tropic_niche_anova)

birdlife_increase_tropic_niche_tukey <- cld(glht(birdlife_increase_tropic_niche_anova, linfct=mcp(trophic_niche="Tukey")))

birdlife_increase_tropic_niche_tukey
```

```{r}
birdlife_diff_data1 <- with.tukey.label.as.group(birdlife_increase_tropic_niche_tukey, birdlife_diff_data, c("trophic_niche" = "category"))
ggplot(birdlife_diff_data1, aes(x = increase, y = trophic_niche, color = group)) + geom_boxplot() + theme_bw()
```

```{r}
ggplot(either_species_data, aes(x = response, y = trophic_niche, color = percentile)) + geom_boxplot() + theme_bw()
```

```{r}
ggplot(both_species_data, aes(x = response, y = trophic_niche, color = percentile)) + geom_boxplot() + theme_bw()
```

```{r}
nrow(merlin_10_data[merlin_10_data$response > 0,])
nrow(merlin_20_data[merlin_20_data$response > 0,])
```

```{r}
nrow(birdlife_10_data[birdlife_10_data$response > 0,])
nrow(birdlife_20_data[birdlife_20_data$response > 0,])
```

```{r}
nrow(either_10_data[either_10_data$response > 0,])
nrow(either_20_data[either_20_data$response > 0,])
```

```{r}
nrow(both_10_data[both_10_data$response > 0,])
nrow(both_20_data[both_20_data$response > 0,])
```

```{r}
ggplot(merlin_species_data, aes(x = response, y = foraging_niche, color = percentile)) + geom_boxplot() + theme_bw()
```

```{r}
interaction.plot(merlin_species_data$foraging_niche, merlin_species_data$percentile, merlin_species_data$response)
```

```{r}
merlin_foraging_niche_anova <- aov(response~foraging_niche + percentile + Error(niche_name), data=merlin_species_data)
summary(merlin_foraging_niche_anova)

merlin_foraging_niche_anova_i <- aov(response~foraging_niche * percentile + Error(niche_name), data=merlin_species_data)
summary(merlin_foraging_niche_anova_i)
```



```{r}
ggplot(birdlife_species_data, aes(x = response, y = foraging_niche, color = percentile)) + geom_boxplot() + theme_bw()
```

```{r}
birdlife_foraging_niche_anova <- aov(response~foraging_niche + percentile + Error(niche_name), data=birdlife_species_data)
summary(birdlife_foraging_niche_anova)

birdlife_foraging_niche_anova_i <- aov(response~foraging_niche * percentile + Error(niche_name), data=birdlife_species_data)
summary(birdlife_foraging_niche_anova_i)
```


```{r}
pc_axis_figure <- function(dataset) {
  annotate_figure(
ggarrange(
  ggplot(dataset, aes(x = pc1, y = log(response), color = percentile)) + geom_point(alpha = 0.5) + geom_smooth(method = "lm", se = FALSE) + theme_bw() + rremove("ylab"),
  ggplot(dataset, aes(x = pc2, y = log(response), color = percentile)) + geom_point(alpha = 0.5) + geom_smooth(method = "lm", se = FALSE) + theme_bw() + rremove("ylab"),
  ggplot(dataset, aes(x = pc3, y = log(response), color = percentile)) + geom_point(alpha = 0.5) + geom_smooth(method = "lm", se = FALSE) + theme_bw() + rremove("ylab"),
  ggplot(dataset, aes(x = pc4, y = log(response), color = percentile)) + geom_point(alpha = 0.5) + geom_smooth(method = "lm", se = FALSE) + theme_bw() + rremove("ylab"),
  nrow = 2, ncol = 2, common.legend = T, label.x = 0),
left = text_grob("log(response)", rot = 90))
}
```


```{r}
pc_axis_figure(merlin_species_data)
```

```{r}
library(lme4)
merlin_pc1_model = lmer(response ~ pc1 * percentile + (1|niche_name), data=merlin_species_data)
anova(merlin_pc1_model)
summary(merlin_pc1_model)
```

```{r}
merlin_pc2_model = lmer(response ~ pc2 * percentile + (1|niche_name), data=merlin_species_data)
anova(merlin_pc2_model)
summary(merlin_pc2_model)
```

```{r}
merlin_pc3_model = lmer(response ~ pc3 * percentile + (1|niche_name), data=merlin_species_data)
anova(merlin_pc3_model)
summary(merlin_pc3_model)
```

```{r}
merlin_pc4_model = lmer(response ~ pc4 * percentile + (1|niche_name), data=merlin_species_data)
anova(merlin_pc4_model)
summary(merlin_pc4_model)
```

```{r}
pc_axis_figure(birdlife_species_data)
```

```{r}
ggplot(merlin_species_data, aes(x = response, y = is_nocturnal, color = percentile)) + geom_boxplot() + theme_bw()
```

```{r}
merlin_nocturnal_anova <- aov(response~is_nocturnal + percentile + Error(niche_name), data=merlin_species_data)
summary(merlin_nocturnal_anova)

merlin_nocturnal_anova_i <- aov(response~is_nocturnal * percentile + Error(niche_name), data=merlin_species_data)
summary(merlin_nocturnal_anova_i)
```

```{r}
ggplot(birdlife_species_data, aes(x = response, y = is_nocturnal, color = percentile)) + geom_boxplot() + theme_bw()
```

```{r}
birdlife_nocturnal_anova <- aov(response~is_nocturnal + percentile + Error(niche_name), data=birdlife_species_data)
summary(birdlife_nocturnal_anova)

birdlife_nocturnal_anova_i <- aov(response~is_nocturnal * percentile + Error(niche_name), data=birdlife_species_data)
summary(birdlife_nocturnal_anova_i)
```

-----------------------------------------
Investigation of percentiles and presence
-----------------------------------------

```{r}
ggplot(merlin_species_data, aes(x = total_species, y = log(response), color = percentile)) + geom_point(alpha = 0.5) + geom_smooth(method = "lm", se = FALSE) + theme_bw()
```

```{r}
merlin_species_count_model = lmer(response ~ total_species * percentile + (1|niche_name), data=merlin_species_data)
anova(merlin_species_count_model)
summary(merlin_species_count_model)
```

```{r}
birdlife_species_count_model = lmer(response ~ total_species * percentile + (1|niche_name), data=birdlife_species_data)
anova(birdlife_species_count_model)
summary(birdlife_species_count_model)
```

```{r}
merlin_species_data$present <- merlin_species_data$response > 0
```

```{r}
ggplot(merlin_species_data, aes(x = log(total_species), y = present, color = percentile)) + geom_boxplot() + theme_bw()
```


```{r}
merlin_present_model = lmer(present ~ log(total_species) * percentile + (1|niche_name), data=merlin_species_data)
anova(merlin_present_model)
summary(merlin_present_model)
```

```{r}
assign_niche_appearance_factor <- function(dataset) {
  dataset$niche_appearance = '> 20%'
  dataset$niche_appearance[dataset$response_20 > 0] = '10% - 20%'
  dataset$niche_appearance[dataset$response_10 > 0] = '< 10%'
  
  dataset$niche_appearance <- factor(dataset$niche_appearance, levels = c("< 10%", "10% - 20%", "> 20%"))
  dataset
}
```

```{r}
merlin_diff_data <- assign_niche_appearance_factor(merlin_diff_data)
```


```{r}
ggplot(merlin_diff_data, aes(fill=niche_appearance, y=trophic_niche)) + 
    geom_bar()
```

```{r}
load_ever_data <- function(pool) {
  response_name <- paste(pool, 'ever', 'perc', 'ratio', sep = '_')
  dataset <- data_for_response(response_name)
  dataset$present <- 'Yes'
  dataset$present[dataset$response == 0] <- 'No'
  dataset$present <- factor(dataset$present, levels = c("Yes", "No"))
  dataset
}
```

```{r}
merlin_ever_data <- load_ever_data('merlin')
merlin_ever_data
```

```{r}
ggplot(merlin_ever_data, aes(fill=present, y=trophic_niche)) + 
    geom_bar()
```
```{r}
birdlife_diff_data <- assign_niche_appearance_factor(birdlife_diff_data)
```


```{r}
ggplot(birdlife_diff_data, aes(fill=niche_appearance, y=trophic_niche)) + 
    geom_bar()
```


```{r}
birdlife_ever_data <- load_ever_data('birdlife')
```

```{r}
ggplot(merlin_ever_data, aes(fill=present, y=trophic_niche)) + 
    geom_bar()
```

----------------------------------
PCA Variation with presence
----------------------------------


```{r}
ggplot(merlin_ever_data, aes(color = present, x = pc1, y = trophic_niche)) + 
    geom_violin() + geom_jitter(aes(group=present, size = total_species), position=position_jitterdodge()) + theme_bw() +
    theme(legend.position="bottom", legend.title=element_text(size=9), legend.text=element_text(size=8), legend.key.size = unit(1,"line")) +
    guides(color=guide_legend(title="Present in any city?"), size=guide_legend(title="Niche size")) +
    ylab("Trophic Niche") + xlab("PC1: Body size") + labs(title = "Merlin Regional Pool")
ggsave("species_analysis_pc1_merlin.jpg")
```
```{r}
ggplot(merlin_diff_data, aes(color = niche_appearance, x = pc1, y = trophic_niche)) + 
    geom_violin()
```


```{r}
ggplot(merlin_ever_data, aes(color = present, x = pc2, y = trophic_niche)) + 
    geom_violin()
```
```{r}
ggplot(merlin_ever_data, aes(color = present, x = pc3, y = trophic_niche)) + 
    geom_violin()
```
```{r}
ggplot(merlin_ever_data, aes(color = present, x = pc4, y = trophic_niche)) + 
    geom_violin()
```


```{r}
ggplot(birdlife_ever_data, aes(color = present, x = pc1, y = trophic_niche)) + 
    geom_violin() + geom_jitter(aes(group=present, size = total_species), position=position_jitterdodge()) + theme_bw() +
    theme(legend.position="bottom", legend.title=element_text(size=9), legend.text=element_text(size=8), legend.key.size = unit(1,"line")) +
    guides(color=guide_legend(title="Present in any city?"), size=guide_legend(title="Niche size")) +
    ylab("Trophic Niche") + xlab("PC1: Body size") + labs(title = "Birdlife Regional Pool")
ggsave("species_analysis_pc1_birdlife.jpg")
```
----------------------------------
First attempt figures
----------------------------------

```{r}
theme <-  theme_bw() + theme(legend.position="bottom", legend.title=element_text(size=9), legend.text=element_text(size=8), legend.key.size = unit(0.5,"line"))
guide_ever <- guides(fill=guide_legend(title="Present in any city?"))
guide_percentile <- guides(fill=guide_legend(title="Top X% of ranked species", nrow = 3))

species_plot1 <- annotate_figure(
  ggarrange(
    ggplot(merlin_ever_data, aes(fill=present, y=trophic_niche)) + geom_bar() + theme + guide_ever + rremove("xlab") + rremove("ylab") + labs(title = "Merlin"),
    ggplot(birdlife_ever_data, aes(fill=present, y=trophic_niche)) + geom_bar() + theme + guide_ever + rremove("xlab") + rremove("ylab") + labs(title = "Birdlife"),
    ggplot(merlin_diff_data, aes(fill=niche_appearance, y=trophic_niche)) +  geom_bar()+ theme + guide_percentile + rremove("xlab") + rremove("ylab"),
    ggplot(birdlife_diff_data, aes(fill=niche_appearance, y=trophic_niche)) + geom_bar() + theme + guide_percentile + rremove("xlab") + rremove("ylab"),
    nrow = 2, ncol = 2, common.legend = F, label.x = 0, labels = c("a)", "", "b)", "")),
left = text_grob("Trophic Niche", rot = 90),
bottom = text_grob("Niche Count"))

species_plot1
```
a) Niches recorded present in at least one city.
b) Given species ranked by percentage of cities populated given they are present in regional pool; This shows niches present with the top 10% of the most urbanised species (red), the top 20% of the most urbanised species (red + green), and then the remaining niches (blue). This approximates to the niches that would be present in the bottom 25% performing hotspots (red), the bottom 75% performing hotspots (red + green), and then the niches at the highest performing hotspots or niches that are never present (blue).

```{r}
guide_percentile_2 <- guides(color=guide_legend(title="Percentile"))
guide_increase <- guides(color=guide_legend(title="Tukey response group", nrow = 2))

species_plot2 <- annotate_figure(
  ggarrange(
    ggplot(merlin_species_data, aes(x = response, y = trophic_niche, color = percentile)) + geom_boxplot() + theme  + guide_percentile_2 + xlab("Fullness") + rremove("ylab") + labs(title = "Merlin"),
    ggplot(birdlife_species_data, aes(x = response, y = trophic_niche, color = percentile)) + geom_boxplot() + theme + guide_percentile_2 + xlab("Fullness") + rremove("ylab") + labs(title = "Birdlife"),
    ggplot(merlin_diff_data1, aes(x = increase, y = trophic_niche, color = group)) + geom_boxplot() + theme + guide_increase + xlab("Increase in fullness") + rremove("ylab"),
    ggplot(birdlife_diff_data1, aes(x = increase, y = trophic_niche, color = group)) + geom_boxplot() + theme + guide_increase + xlab("Increase in fullness") + rremove("ylab"), 
    nrow = 2, ncol = 2, common.legend = F, label.x = 0, labels = c("c)", "", "d)", "")),
left = text_grob("Trophic Niche", rot = 90))

species_plot2
```
c) The redundancy/fullness of the niches that are present in cities with 10% of the most urbanised species, and the redundancy/fullness of the niches that are present in cities with 20% of the most urbanised species.
d) The percentage increase in redundancy/fullness of niches between the top 10% of most urbanised species and the top 20% of most urbanised species, grouped using Tukey based on an anova of the percentage increase given the tropic niche. This shows the amount of redundancy that is increased from the bottom 25% performing hotspots (hotspots with 10% of regional pool) and the bottom 75% of performing hotspots (hotspots with 20% of regional pool).


```{r}
jpeg("species_plot1.jpg", width = 800, height = 600)
species_plot1
dev.off()

jpeg("species_plot2.jpg", width = 800, height = 600)
species_plot2
dev.off()
```

----------------------------------
Trophic Niche Accumulation
----------------------------------

```{r}
accumulation <- read_csv('trophic_niche_accumulation.csv')
accumulation
```
```{r}
merlin_accumulation = ggplot(accumulation, aes(x = percent, y = merlin_niche_count)) + 
  geom_line(linetype = "dotted") + 
  geom_line(aes(linetype = merlin_remaining_species), na.value = "dash") +
  scale_linetype_manual(values=c("blank", "solid"), guide = "none") +
  geom_hline(data = . %>% group_by(trophic_niche) %>% filter(merlin_niche_count == max(merlin_niche_count)), aes(yintercept = merlin_niche_count), color = "blue", size = 0.25, linetype = "dashed") +
  theme_bw() +
  theme(legend.position = "bottom", strip.text = element_text(size = 6), axis.text.x = element_text(angle = 90, size = 6),  axis.text.y = element_text(size = 6)) +
  facet_wrap (~ trophic_niche) +
  xlab("Percentage of all Species") + 
  ylab("Number of Niches") +
  labs(title = "Merlin Niche Accumulation")

merlin_accumulation
```


```{r}
birdlife_accumulation = ggplot(accumulation, aes(x = percent, y = birdlife_niche_count)) + 
  geom_line(linetype = "dotted") + 
  geom_line(aes(linetype = birdlife_remaining_species), na.value = "dash") +
  scale_linetype_manual(values=c("blank", "solid"), guide = "none") +
  geom_hline(data = . %>% group_by(trophic_niche) %>% filter(birdlife_niche_count == max(birdlife_niche_count)), aes(yintercept = birdlife_niche_count), color = "blue", size = 0.25, linetype = "dashed") +
  theme_bw() +
  theme(legend.position = "bottom", strip.text = element_text(size = 6), axis.text.x = element_text(angle = 90, size = 6),  axis.text.y = element_text(size = 6)) +
  facet_wrap (~ trophic_niche) +
  xlab("Percentage of all Species") + 
  ylab("Number of Niches") +
  labs(title = "Birdlife Niche Accumulation")

birdlife_accumulation
```
```{r}
annotate_figure(
  ggarrange(merlin_accumulation + rremove("xlab") + rremove("ylab"), 
            birdlife_accumulation + rremove("xlab") + rremove("ylab"),
            ncol = 2, label.x = 0),
left = text_grob("Number of Niches Present", rot = 90),
bottom = text_grob("Percentage of all Species - ranked by urbanisation"))


ggsave("trophic_niche_accumlation.jpg")
```
```{r}
by_max_birdlife_niche_count = accumulation %>% group_by(trophic_niche) %>% summarise(Value = max(birdlife_niche_count))
by_max_birdlife_niche_count[order(-by_max_birdlife_niche_count$Value),]
```

```{r, fig.height = 12}
ggplot(accumulation, aes(x = percent, y = birdlife_niche_count)) + 
  geom_line(linetype = "dotted") + 
  geom_line(aes(linetype = birdlife_remaining_species), na.value = "dash") +
  scale_linetype_manual(values=c("blank", "solid"), guide = "none") +
  geom_hline(data = . %>% group_by(trophic_niche) %>% filter(birdlife_niche_count == max(birdlife_niche_count)), aes(yintercept = birdlife_niche_count), color = "blue", size = 0.25, linetype = "dashed") +
  theme_bw() +
  theme(legend.position = "bottom", strip.text = element_text(size = 6), axis.text.x = element_text(angle = 90, size = 6),  axis.text.y = element_text(size = 6)) +
  facet_wrap (~ factor(trophic_niche, levels = c("Invertivore", "Aquatic predator", "Omnivore", "Vertivore", "Frugivore", "Herbivore aquatic", "Granivore", "Herbivore terrestrial", "Nectarivore", "Scavenger")), scales = "free") +
  xlab("Percentage of all Species") + 
  ylab("Number of Niches") +
  labs(title = "Birdlife Niche Accumulation") +
  xlim(0, 50)
```









